*-------------------------------------------------------------------------------
*						Data Pre-Process
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out"  

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Pre-census", replace 
set more off


**------------------------------------------------------------------------------
* Step1: Generate Data
**------------------------------------------------------------------------------
clear
clear all


tempfile match15
clear
use "$Class_data\行业标准转换\census2000-2015_行业统一版本.dta"
keep if post11_3dig!=""
save "`match15'"

tempfile match00
clear
use "$Class_data\行业标准转换\census2000-2015_行业统一版本.dta"
keep if pre_94_3dig!=""
save "`match00'"

tempfile match10
clear
use "$Class_data\行业标准转换\census2000-2015_行业统一版本.dta"
keep new new_title
duplicates drop
save "`match10'"

*Different 
foreach j in "2000" "2010" "2015"{
use "$Raw_data\census`j'.dta",clear
*Define diffent variables and adjust industry category
	if `j'==2000{
	
		*conditional on employed workers 
		drop if occ==.|occ==0 //0-at school
		tostring occ, generate(occ_`j')
		replace occ_`j'="0"+occ_`j' if occ < 100
		
		*Industry
		tostring industry,gen(pre_94_3dig)
		replace pre_94_3dig="0"+pre_94_3dig if industry < 100
		merge m:1 pre_94_3dig using "`match00'"
		keep if _merge==3
		drop _m
	}

	if `j'==2010{
		gen age =2010-_出生年
	
		*Conditional on employed workers 
		drop if _职业==.
		tostring _职业, generate(occ_`j')
		replace occ_`j'="0"+occ_`j' if _职业 < 100
		
		*Industry
		tostring _行业,gen(new)
		replace new="0"+new if _行业 < 100
		merge m:1 new using "`match10'"
		keep if _m==3
		drop _m
	}

	if `j'==2015{
		gen age=2015-birth_year
		
		*Conditional on employed workers 
		drop if occ==""
		gen occ_`j'=occ
		
		*Industry
		gen post11_3dig=industry
		merge m:1 post11_3dig using "`match15'"
		keep if _merge==3
		drop _m
	}
	

*Labor in the market
keep if age >=15&age<=64	
	
*Adjust consistent occ category
merge m:1 occ_`j' using "`j'_occ_consistent_characteristics.dta"
keep if _m==3 
drop _m

save "`j'census_consistent.dta",replace


*生成新的id
gen id_new=_n

*职业数量
collapse (count) id_new (mean) index* education,by(consistent title_consistent)
egen total=total(id_new)
gen share =(id_new/total)*100

gsort -share 
egen rank=rank(-share),track

keep rank consistent title_consistent share index* education
order rank consistent title_consistent share

gen year=`j'

compress
rename (consistent title_consistent) (occ_number occ_name)

save "whole_occ`j'.dta",replace
}


log close